Integrate Tregs¶

Load libraries and set files paths¶

In [1]:
suppressPackageStartupMessages({
    library(Seurat)
    library(dplyr)
    library(data.table)
    library(ggplot2)
    library(patchwork)
    library(harmony)
    library(entropy)
    library(presto)
    library(singlecellmethods)
    library(lme4)
    library(purrr)
    library(pheatmap)
    library(rcna)
    library(glue)
    library(ggthemes)
})

source("myfun.r")
In [2]:
# path list
allDat.dir <- "/data/brennerlab/Shani/projects/Treg/analysis/"

# AMP data options
# amp2.path <- paste0(allDat.dir, "AMP2_RA_tissue/AMP2_tissue_Treg_orig.rds") # AMP original Tregs annotations
amp2.path <- paste0(allDat.dir, "AMP2_RA_tissue/AMP2_tissue_Treg_filtered_QC.rds") # Tregs after additional QC filtering 
# amp2.path <- paste0(allDat.dir, "AMP2_RA_tissue/AMP2_tissue_Treg_filtered_refined.rds") # Refined Treg annotations (permissive)
# amp2.path <- paste0(allDat.dir, "AMP2_RA_tissue/AMP2_tissue_Treg_alternative.rds") # My alternative Treg annotation

amp2.extra.metadata.path <- "/data/brennerlab/Shani/projects/AMP_Phase_2/treatment assigned metadata_clin_donor_singlecell.csv"
amprep.path <- paste0(allDat.dir, "AMPrep_syn_bl/AMPrep_ST_BL_filtered_Tregs.rds")
sf.path <- paste0(allDat.dir, "SF_sf_bl/SF_BL_filtered.rds")

# ctrl.bl.path <- paste0(allDat.dir, "Ctrl_bl/Ctrl_BL_filtered_Tregs.rds")
ctrl.bl.path <- paste0(allDat.dir, "Ctrl_bl/Ctrl_BL_Luo_filtered_Tregs.rds")

#TODO: control blood

save.path <- "/data/brennerlab/Shani/projects/Treg/analysis/integrated/"

Load all dataset¶

In [3]:
amp2 <- readRDS(amp2.path)
amprep <- readRDS(amprep.path)
sf <- readRDS(sf.path)
ctrl.bl <- readRDS(ctrl.bl.path)

unify metadata¶

AMP2 dataset¶

In [4]:
amp2@meta.data%>% colnames()
amp2@meta.data %>% head(2)
  1. 'orig.ident'
  2. 'nCount_RNA'
  3. 'nFeature_RNA'
  4. 'sample'
  5. 'cluster'
  6. 'cell_type'
  7. 'donor'
  8. 'fibro'
  9. 'Tfilter'
  10. 'nUMI'
  11. 'nGene'
  12. 'percent_mito'
  13. 'Tfilter2'
  14. 'cluster_number'
  15. 'cluster_name'
  16. 'nCount_ADT'
  17. 'nFeature_ADT'
  18. 'age'
  19. 'sex'
  20. 'treatment'
  21. 'CDAI'
  22. 'disease.duration'
  23. 'tissue.type'
  24. 'krenn'
  25. 'subject_id'
  26. 'CTAP'
  27. 'UMAP_1.amp'
  28. 'UMAP_2.amp'
  29. 'humap_fgraph_res.0.5'
  30. 'seurat_clusters'
  31. 'humap_fgraph_res.0.8'
  32. 'humap_fgraph_res.1'
  33. 'sub.cluster'
A data.frame: 2 × 33
orig.identnCount_RNAnFeature_RNAsampleclustercell_typedonorfibroTfilternUMI⋯krennsubject_idCTAPUMAP_1.ampUMAP_2.amphumap_fgraph_res.0.5seurat_clustershumap_fgraph_res.0.8humap_fgraph_res.1sub.cluster
<fct><dbl><int><chr><chr><chr><chr><lgl><lgl><dbl>⋯<dbl><chr><chr><dbl><dbl><fct><fct><fct><fct><chr>
BRI-399_AAAGGTAGTGCAGGATBRI-39941011492BRI-399T-8: CD4+ CD25-high TregT cellBRI-399TRUEFALSE4101⋯NA301-0267NA-1.5639392.57749533422
BRI-399_AACGGGATCTTGTTACBRI-39935681454BRI-399T-8: CD4+ CD25-high TregT cellBRI-399TRUEFALSE3568⋯NA301-0267NA-1.6300561.67256444944
In [5]:
amp2.extra.metadata <- read.csv(amp2.extra.metadata.path)
# amp2.extra.metadata <- amp2.extra.metadata %>% mutate(subject_id = gsub("-", "_", subject_id)) 
amp2.extra.metadata%>% head
A data.frame: 6 × 16
subject_idpipeline_datesitetreatmentbiopsiedsexageCDAIdisease_durationtissue_typekrenn_inflammationsingle_cell_techcell_count_to_10xmRNA_runprotein_runatac_run
<chr><chr><chr><chr><chr><chr><int><chr><dbl><chr><dbl><chr><chr><chr><chr><chr>
1301-02678/6/2019University of PittsburghOA Female70 NAArthroplasty NACITEseq + flow/bulk + re-freeze15,000BRI-399BRI-400BRI-448
2300-03028/6/2019Cedars Naïve Knee Female7715 2.1Biopsy 2.33CITEseq 15,000BRI-401BRI-402
3300-01508/6/2019University of Rochester methotrexate failureWristFemale2822 1.5Synovectomy 2.67CITEseq + flow/bulk + re-freeze15,000BRI-403BRI-404BRI-449
4300-03108/6/2019Cedars TNF failure WristFemale6224.516.0Biopsy 0.33CITEseq 2,200 BRI-405BRI-406
5300-26638/6/2019Columbia University methotrexate failureWristFemale4440 6.9Biopsy 3.00CITEseq 3,800 BRI-407BRI-408
6300-04608/6/2019Northwestern methotrexate failureWristFemale6811 3.8Biopsy 3.00CITEseq 5,000 BRI-409BRI-410
In [6]:
amp2.meta <- amp2@meta.data %>% select(donor) %>% left_join(., amp2.extra.metadata%>% select(c(mRNA_run, biopsied)),by = join_by(donor == mRNA_run))
amp2$biopsied <- amp2.meta$biopsied
amp2@meta.data[2434:2436,]
A data.frame: 3 × 34
orig.identnCount_RNAnFeature_RNAsampleclustercell_typedonorfibroTfilternUMI⋯subject_idCTAPUMAP_1.ampUMAP_2.amphumap_fgraph_res.0.5seurat_clustershumap_fgraph_res.0.8humap_fgraph_res.1sub.clusterbiopsied
<fct><dbl><int><chr><chr><chr><chr><lgl><lgl><dbl>⋯<chr><chr><dbl><dbl><fct><fct><fct><fct><chr><chr>
BRI-515_TTGGTTTCACGGTGTCBRI-5151849 901BRI-515T-9: CD4+ CD25-low Treg T cellBRI-515FALSETRUE1849⋯300-0504T + F-1.7371601.26887700000Ankle
BRI-515_TTGTGTTCAAATTGCCBRI-51558841923BRI-515T-8: CD4+ CD25-high TregT cellBRI-515FALSETRUE5884⋯300-0504T + F-2.2151381.91983022311Ankle
BRI-515_TTGTGTTTCGCGCCAABRI-51545251311BRI-515T-8: CD4+ CD25-high TregT cellBRI-515FALSETRUE4525⋯300-0504T + F-1.6212912.51676600011Ankle
In [7]:
amp2$orig.ident <- "AMP2"
amp2$sample.library <- amp2$sample
amp2$donorID <- amp2$subject_id
amp2$donorID <- gsub("-", "_", amp2$donorID) 
amp2$percent.mito <- amp2$percent_mito

amp2$tissue <- "Syn"
amp2$tissue <- ifelse(amp2$treatment == "OA", "Syn_OA", amp2$tissue)
amp2$tissue_collection <- amp2$tissue.type
In [8]:
amp2@meta.data <- amp2@meta.data %>% select(c(-cluster, -cell_type, -donor, -fibro, -Tfilter, -nUMI, -nGene, -Tfilter2, -tissue.type, -subject_id, 
                                              -UMAP_1.amp, -UMAP_2.amp, -humap_fgraph_res.0.8, -seurat_clusters, -percent_mito)) #
colnames(amp2@meta.data)
  1. 'orig.ident'
  2. 'nCount_RNA'
  3. 'nFeature_RNA'
  4. 'sample'
  5. 'cluster_number'
  6. 'cluster_name'
  7. 'nCount_ADT'
  8. 'nFeature_ADT'
  9. 'age'
  10. 'sex'
  11. 'treatment'
  12. 'CDAI'
  13. 'disease.duration'
  14. 'krenn'
  15. 'CTAP'
  16. 'humap_fgraph_res.0.5'
  17. 'humap_fgraph_res.1'
  18. 'sub.cluster'
  19. 'biopsied'
  20. 'sample.library'
  21. 'donorID'
  22. 'percent.mito'
  23. 'tissue'
  24. 'tissue_collection'

AMP rep dataset¶

In [9]:
amprep@meta.data%>% colnames()
amprep@meta.data %>% head(2)
  1. 'orig.ident'
  2. 'nCount_RNA'
  3. 'nFeature_RNA'
  4. 'nCount_ADT'
  5. 'nFeature_ADT'
  6. 'study_ID'
  7. 'AMP_ID'
  8. 'method'
  9. 'tissue_location'
  10. 'age'
  11. 'sex'
  12. 'ethnicity'
  13. 'race'
  14. 'RA_duration'
  15. 'krenn_inflammation'
  16. 'cdai_V0'
  17. 'CTAP'
  18. 'pathotype'
  19. 'treatment_group'
  20. 'site'
  21. 'library'
  22. 'sampleName'
  23. 'donorID'
  24. 'tissue'
  25. 'ct'
  26. 'seq_repeat'
  27. 'percent.mito'
  28. 'scDblFinder.class'
  29. 'scDblFinder.score'
  30. 'sub.cluster'
  31. 'humap_fgraph_res.0.5'
  32. 'seurat_clusters'
  33. 'humap_fgraph_res.0.8'
A data.frame: 2 × 33
orig.identnCount_RNAnFeature_RNAnCount_ADTnFeature_ADTstudy_IDAMP_IDmethodtissue_locationage⋯tissuectseq_repeatpercent.mitoscDblFinder.classscDblFinder.scoresub.clusterhumap_fgraph_res.0.5seurat_clustershumap_fgraph_res.0.8
<chr><dbl><int><dbl><int><chr><chr><chr><chr><int>⋯<chr><chr><chr><dbl><fct><dbl><chr><fct><fct><fct>
300_0150_PBL_BT_5_AACACGTAGCGTTCCG-1300_0150_PBL_BT_534584516625995RA01300_0150SynovectomyWrist28⋯PBLBT13.336803singlet0.18873795914344
300_0150_PBL_BT_5_AACTCAGCATCGACGC-1300_0150_PBL_BT_5 4841170311086RA01300_0150SynovectomyWrist28⋯PBLBT12.210287singlet0.0068593297 211
In [10]:
amprep$orig.ident <- "AMPrep"
amprep$sample.library <- amprep$sampleName
amprep$donorID <- amprep$AMP_ID
amprep$treatment <- amprep$treatment_group
amprep$tissue_collection <- amprep$method
amprep$biopsied <- amprep$tissue_location
amprep$disease.duration <- amprep$RA_duration
amprep$krenn <- amprep$krenn_inflammation
amprep$CDAI <- amprep$cdai_V0

amprep$tissue <- ifelse(amprep$tissue == "PBL", "PBMC.RA", amprep$tissue)
In [11]:
amprep@meta.data <- amprep@meta.data %>% select(c(-study_ID, -scDblFinder.class, -scDblFinder.score, -RA_duration, -krenn_inflammation, -cdai_V0, -treatment_group, 
                                              -site, -library, -sampleName, -ct, -humap_fgraph_res.0.8, -seurat_clusters, -AMP_ID))
colnames(amprep@meta.data)
  1. 'orig.ident'
  2. 'nCount_RNA'
  3. 'nFeature_RNA'
  4. 'nCount_ADT'
  5. 'nFeature_ADT'
  6. 'method'
  7. 'tissue_location'
  8. 'age'
  9. 'sex'
  10. 'ethnicity'
  11. 'race'
  12. 'CTAP'
  13. 'pathotype'
  14. 'donorID'
  15. 'tissue'
  16. 'seq_repeat'
  17. 'percent.mito'
  18. 'sub.cluster'
  19. 'humap_fgraph_res.0.5'
  20. 'sample.library'
  21. 'treatment'
  22. 'tissue_collection'
  23. 'biopsied'
  24. 'disease.duration'
  25. 'krenn'
  26. 'CDAI'

sf dataset¶

In [12]:
sf@meta.data%>% colnames()
sf@meta.data %>% head(2)
  1. 'orig.ident'
  2. 'nCount_RNA'
  3. 'nFeature_RNA'
  4. 'nCount_ADT'
  5. 'nFeature_ADT'
  6. 'nCount_HTO'
  7. 'nFeature_HTO'
  8. 'sampleID'
  9. 'percent.mito'
  10. 'HTO_maxID'
  11. 'HTO_secondID'
  12. 'HTO_margin'
  13. 'HTO_classification'
  14. 'HTO_classification.global'
  15. 'hash.ID'
  16. 'Diagnosis'
  17. 'donorID'
  18. 'tissue'
  19. 'sorted.population'
  20. 'soting.marker'
  21. 'humap_fgraph_res.1'
  22. 'seurat_clusters'
  23. 'scDblFinder.class'
  24. 'scDblFinder.score'
  25. 'sub.cluster'
  26. 'humap_fgraph_res.0.8'
A data.frame: 2 × 26
orig.identnCount_RNAnFeature_RNAnCount_ADTnFeature_ADTnCount_HTOnFeature_HTOsampleIDpercent.mitoHTO_maxID⋯donorIDtissuesorted.populationsoting.markerhumap_fgraph_res.1seurat_clustersscDblFinder.classscDblFinder.scoresub.clusterhumap_fgraph_res.0.8
<chr><dbl><int><dbl><int><dbl><int><chr><dbl><chr>⋯<chr><chr><chr><chr><fct><fct><fct><dbl><chr><fct>
BRI-1426__AAACCTGAGAGTAAGG-1SF47001697610 991695BRI-14265.319149Hashtag5⋯DiSF013 (aka P012)SFMCR2 (Treg)CD127-00singlet0.03570022130
BRI-1426__AAACCTGAGGCAGTCA-1SF622321897031013235BRI-14261.462317Hashtag5⋯DiSF013 (aka P012)SFMCR2 (Treg)CD127-99singlet0.00233565619
In [13]:
sf$orig.ident <- "SF.BL"
sf$sample.library <- sf$sampleID
sf$tissue <- ifelse(sf$tissue == "PBMC", "PBMC.RA", sf$tissue)
In [14]:
sf@meta.data <- sf@meta.data %>% select(c(-Diagnosis, -sorted.population, -soting.marker, -humap_fgraph_res.0.8, -seurat_clusters, -sampleID))
colnames(sf@meta.data)
  1. 'orig.ident'
  2. 'nCount_RNA'
  3. 'nFeature_RNA'
  4. 'nCount_ADT'
  5. 'nFeature_ADT'
  6. 'nCount_HTO'
  7. 'nFeature_HTO'
  8. 'percent.mito'
  9. 'HTO_maxID'
  10. 'HTO_secondID'
  11. 'HTO_margin'
  12. 'HTO_classification'
  13. 'HTO_classification.global'
  14. 'hash.ID'
  15. 'donorID'
  16. 'tissue'
  17. 'humap_fgraph_res.1'
  18. 'scDblFinder.class'
  19. 'scDblFinder.score'
  20. 'sub.cluster'
  21. 'sample.library'

Control blood dataset¶

In [15]:
ctrl.bl@meta.data%>% colnames()
ctrl.bl@meta.data%>% head(2)
  1. 'orig.ident'
  2. 'nCount_RNA'
  3. 'nFeature_RNA'
  4. 'donorID'
  5. 'sex'
  6. 'age'
  7. 'sequencing.technology'
  8. 'library'
  9. 'celltype'
  10. 'disease'
  11. 'sample_name'
  12. 'tissue'
  13. 'percent.mito'
  14. 'humap_fgraph_res.1'
  15. 'seurat_clusters'
  16. 'scDblFinder.class'
  17. 'scDblFinder.score'
  18. 'humap_fgraph_res.0.8'
  19. 'humap_fgraph_res.0.5'
A data.frame: 2 × 19
orig.identnCount_RNAnFeature_RNAdonorIDsexagesequencing.technologylibrarycelltypediseasesample_nametissuepercent.mitohumap_fgraph_res.1seurat_clustersscDblFinder.classscDblFinder.scorehumap_fgraph_res.0.8humap_fgraph_res.0.5
<chr><dbl><int><chr><chr><int><chr><chr><chr><chr><chr><chr><dbl><fct><fct><fct><dbl><fct><fct>
AAACCTGAGACGCTTT-1_2_1_1HD_Luo_HD436691391HD4M45scRNA_VDJ5SRR14664412_GSM5342394TregHDSRR14664412_GSM5342394_Treg_HD_PB4PB2.8345620singlet0.00645407420
AAACCTGAGAGTAAGG-1_2_1_1HD_Luo_HD425131033HD4M45scRNA_VDJ5SRR14664412_GSM5342394TregHDSRR14664412_GSM5342394_Treg_HD_PB4PB5.1731020singlet0.02627710820
In [16]:
cellnames <- colnames(ctrl.bl)
mt <- ctrl.bl@meta.data %>% mutate(new.names = paste0(tissue, "-", donorID, "_", gsub("_.*", "", rownames(.)))) %>% with(new.names)
ctrl.bl <- RenameCells(ctrl.bl, new.names = mt)
ctrl.bl@meta.data%>% head(2)
A data.frame: 2 × 19
orig.identnCount_RNAnFeature_RNAdonorIDsexagesequencing.technologylibrarycelltypediseasesample_nametissuepercent.mitohumap_fgraph_res.1seurat_clustersscDblFinder.classscDblFinder.scorehumap_fgraph_res.0.8humap_fgraph_res.0.5
<chr><dbl><int><chr><chr><int><chr><chr><chr><chr><chr><chr><dbl><fct><fct><fct><dbl><fct><fct>
PB-HD4_AAACCTGAGACGCTTT-1HD_Luo_HD436691391HD4M45scRNA_VDJ5SRR14664412_GSM5342394TregHDSRR14664412_GSM5342394_Treg_HD_PB4PB2.8345620singlet0.00645407420
PB-HD4_AAACCTGAGAGTAAGG-1HD_Luo_HD425131033HD4M45scRNA_VDJ5SRR14664412_GSM5342394TregHDSRR14664412_GSM5342394_Treg_HD_PB4PB5.1731020singlet0.02627710820
In [17]:
ctrl.bl@meta.data <- ctrl.bl@meta.data %>% mutate(sex = gsub("F", "Female", .$sex)) %>% mutate(sex = gsub("M", "Male", .$sex))
In [18]:
ctrl.bl$tissue <- "PBMC.Ctrl"
ctrl.bl$donorID <- ctrl.bl$donorID
ctrl.bl$sample.library <- ctrl.bl$sample_name
ctrl.bl$orig.ident <- "HD_Luo"
In [19]:
ctrl.bl@meta.data <- ctrl.bl@meta.data %>% select(c(-sequencing.technology, -library, -celltype, -disease, -sample_name, -humap_fgraph_res.1, -seurat_clusters, 
                                                    -humap_fgraph_res.0.8, -humap_fgraph_res.0.5))
In [20]:
colnames(ctrl.bl@meta.data)
  1. 'orig.ident'
  2. 'nCount_RNA'
  3. 'nFeature_RNA'
  4. 'donorID'
  5. 'sex'
  6. 'age'
  7. 'tissue'
  8. 'percent.mito'
  9. 'scDblFinder.class'
  10. 'scDblFinder.score'
  11. 'sample.library'

Merge seurat object¶

In [21]:
merged <- merge(amp2, c(amprep, sf, ctrl.bl))
In [22]:
merged
An object of class Seurat 
38406 features across 39332 samples within 3 assays 
Active assay: RNA (38224 features, 0 variable features)
 2 layers present: counts, data
 2 other assays present: ADT, HTO
In [23]:
table(merged$orig.ident)
table(merged$tissue)
  AMP2 AMPrep HD_Luo  SF.BL 
  5368   3298  25847   4819 
PBMC.Ctrl   PBMC.RA      SFMC       Syn    Syn_OA 
    25847      4053      1854      7229       349 

Removing OA samples¶

In [24]:
Idents(merged) <- "tissue"
merged <- subset(merged, ident = "Syn_OA", invert = TRUE)
In [25]:
merged
An object of class Seurat 
38406 features across 38983 samples within 3 assays 
Active assay: RNA (38224 features, 0 variable features)
 2 layers present: counts, data
 2 other assays present: ADT, HTO
In [26]:
table(merged$orig.ident)
table(merged$tissue)
  AMP2 AMPrep HD_Luo  SF.BL 
  5019   3298  25847   4819 
PBMC.Ctrl   PBMC.RA      SFMC       Syn 
    25847      4053      1854      7229 

basic QC¶

In [27]:
fig.size(5, 8)
#mito %
merged[["percent.mito"]] <- PercentageFeatureSet(merged, pattern = "^MT-")
VlnPlot(merged, features = "percent.mito", pt.size = 0.0, group.by = "orig.ident") + theme(legend.position = "none") + ggtitle(NULL) +
  xlab(NULL) + ylab("% mitochondrial genes") + geom_hline(yintercept=15)
VlnPlot(merged, features = "percent.mito", pt.size = 0.0, group.by = "tissue") + theme(legend.position = "none") + ggtitle(NULL) +
  xlab(NULL) + ylab("% mitochondrial genes") + geom_hline(yintercept=15)
fig.size(5, 20)
VlnPlot(merged, features = "percent.mito", pt.size = 0.0, group.by = "donorID") + theme(legend.position = "none") + ggtitle(NULL) +
  xlab(NULL) + ylab("% mitochondrial genes") + geom_hline(yintercept=15)
In [28]:
#genes num
fig.size(5, 8)
VlnPlot(merged, features = "nFeature_RNA", pt.size = 0.0, group.by = "orig.ident") + theme(legend.position = "none") + ggtitle(NULL) +
  xlab(NULL) + ylab("genes number") 
VlnPlot(merged, features = "nFeature_RNA", pt.size = 0.0, group.by = "tissue") + theme(legend.position = "none") + ggtitle(NULL) +
  xlab(NULL) + ylab("genes number") 
fig.size(5, 20)
VlnPlot(merged, features = "nFeature_RNA", pt.size = 0.0, group.by = "donorID") + theme(legend.position = "none") + ggtitle(NULL) +
  xlab(NULL) + ylab("genes number")
In [29]:
#counts num
fig.size(5, 8)
VlnPlot(merged, features = "nCount_RNA", pt.size = 0.0, group.by = "orig.ident") + theme(legend.position = "none") + ggtitle(NULL) +
  xlab(NULL) + ylab("counts number") 
VlnPlot(merged, features = "nCount_RNA", pt.size = 0.0, group.by = "tissue") + theme(legend.position = "none") + ggtitle(NULL) +
  xlab(NULL) + ylab("counts number") 
fig.size(5, 20)
VlnPlot(merged, features = "nCount_RNA", pt.size = 0.0, group.by = "donorID") + theme(legend.position = "none") + ggtitle(NULL) +
  xlab(NULL) + ylab("counts number")
In [30]:
merged@meta.data%>% head(2)
A data.frame: 2 × 40
orig.identnCount_RNAnFeature_RNAsamplecluster_numbercluster_namenCount_ADTnFeature_ADTagesex⋯nCount_HTOnFeature_HTOHTO_maxIDHTO_secondIDHTO_marginHTO_classificationHTO_classification.globalhash.IDscDblFinder.classscDblFinder.score
<chr><dbl><int><chr><chr><chr><dbl><int><int><chr>⋯<dbl><int><chr><chr><dbl><chr><chr><chr><chr><dbl>
BRI-401_AAACGCTCAATACCCAAMP249801693BRI-401T-8T-8: CD4+ CD25-high Treg12904677Female⋯NANANANANANANANANANA
BRI-401_AAAGGATCACACACGCAMP254351869BRI-401T-8T-8: CD4+ CD25-high Treg 3524377Female⋯NANANANANANANANANANA

Preprocessing¶

In [31]:
merged <- merged %>% 
            NormalizeData(normalization.method = "LogNormalize", scale.factor = 10000) %>% 
            FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>% 
            ScaleData() %>% 
            RunBalancedPCA(weight.by = "tissue", npcs = 50)
            # RunPCA(verbose = T)
Centering and scaling data matrix

In [32]:
# I don't re-normalize/scale protein data as not all datasets has this information!
# merged <- merged %>% 
#             NormalizeData(normalization.method = "CLR", margin = 2, assay = "ADT") #%>% 
#             # ScaleData(assay = "ADT")
In [ ]:
fig.size(4,4)
DimPlot(merged, reduction = "pca", group.by = "orig.ident",shuffle = T)
DimPlot(merged, reduction = "pca", group.by = "tissue",shuffle = T)
fig.size(4,4)
DimPlot(merged, reduction = "pca", group.by = "donorID",shuffle = T, label = F) + theme(legend.position = "none")
In [ ]:
fig.size(4,5)
DimPlot(merged, reduction = "pca", group.by = "orig.ident",shuffle = T, dims = c(1,3))
DimPlot(merged, reduction = "pca", group.by = "tissue",shuffle = T, dims = c(1,3))
fig.size(4,4)
DimPlot(merged, reduction = "pca", group.by = "donorID",shuffle = T, label = F, dims = c(1,3)) + theme(legend.position = "none")
In [ ]:
fig.size(4,4)
DimPlot(merged, reduction = "pca", group.by = "orig.ident",shuffle = T, dims = c(1,4))
DimPlot(merged, reduction = "pca", group.by = "tissue",shuffle = T, dims = c(1,4))
In [ ]:
fig.size(4,20)
DimPlot(merged, reduction = "pca", group.by = "tissue", shuffle = T, label = F, split.by = "orig.ident")
In [37]:
barplot(table(merged$donorID))
In [ ]:
# compare the duplicated donors (from datasets AMP2 and AMPrep, to look for the variable of variation

dup.donor <- merged@meta.data%>% select(donorID, orig.ident)%>% unique() %>% arrange(donorID)%>% count(donorID)%>% filter(n>1)%>% .$donorID
fig.size(5,5)
DimPlot(merged, reduction = "pca", cells = colnames(merged)[merged$donorID %in% dup.donor], group.by = "donorID",shuffle = T, label = F) + theme(legend.position = "none")
DimPlot(merged, reduction = "pca", cells = colnames(merged)[merged$donorID %in% dup.donor], group.by = "orig.ident",shuffle = T, label = F)
DimPlot(merged, reduction = "pca", cells = colnames(merged)[merged$donorID %in% dup.donor], group.by = "donorID",shuffle = T, label = F, dims = 2:3) + theme(legend.position = "none")
DimPlot(merged, reduction = "pca", cells = colnames(merged)[merged$donorID %in% dup.donor], group.by = "orig.ident",shuffle = T, label = F, dims = 2:3)

Looks like: PC1 separates experiment PC2 separates tissue PC3 separates experiment, too

Harmony¶

In [39]:
merged <- RunHarmony(merged, group.by.vars=c("orig.ident", "donorID", "tissue"), 
                     reduction.use = "pca", 
                     plot_convergence = TRUE, 
                     early_stop = F)


fig.size(5,5)

ElbowPlot(merged, ndims = 50, reduction = "harmony") 
ElbowPlot(merged, ndims = 50, reduction = "pca")
Transposing data matrix

Initializing state using k-means centroids initialization

Harmony 1/10

Harmony 2/10

Harmony 3/10

Harmony 4/10

Harmony 5/10

Harmony 6/10

Harmony 7/10

Harmony 8/10

Harmony 9/10

Harmony 10/10

In [ ]:
fig.size(5,5)
DimPlot(merged, reduction = "harmony", group.by = "orig.ident",shuffle = T) 
DimPlot(merged, reduction = "harmony", group.by = "tissue",shuffle = T)
fig.size(5,15)
DimPlot(merged, reduction = "harmony", group.by = "donorID",shuffle = T, label = F)
In [41]:
# run umap
set.seed(1)
merged <- Run_uwot_umap(merged, min_dist = 0.5, spread = 1.5)
merged <- FindClusters(merged, graph.name = 'humap_fgraph',
                                  resolution = 0.8, verbose = TRUE)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 38983
Number of edges: 474203

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.6913
Number of communities: 10
Elapsed time: 3 seconds
In [42]:
fig.size(5, 6)

red.use <- "humap"
DimPlot(merged, reduction = red.use, label = T, repel = T, shuffle = T, label.size = 6) + 
    theme(text = element_text(size = 20)) + scale_color_tableau("Tableau 20")
DimPlot(merged, reduction = red.use,group.by = "tissue", label = T, repel = T, shuffle = T, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(merged, reduction = red.use,group.by = "orig.ident", label = F, repel = T, shuffle = T, label.size = 6) + theme(text = element_text(size = 20))

fig.size(6, 6)
DimPlot(merged, reduction = red.use,group.by = "sex", label = F, repel = T, shuffle = T, label.size = 6) + theme(text = element_text(size = 20))
In [43]:
fig.size(8, 8)
DimPlot(merged, reduction = red.use,group.by = "tissue", label = T, repel = T, shuffle = T, pt.size = 0.1, label.size = 6) + 
theme(text = element_text(size = 20)) + scale_color_tableau("Tableau 10")
In [44]:
fig.size(12,12)
FeaturePlot(merged, reduction = red.use,features = c("FOXP3", "IL2RA", "AREG", "CXCR6", "MKI67", "CD4", "CD8A", "CCR7", "TIGIT"))
In [45]:
fig.size(5, 20)
DimPlot(merged, reduction = red.use, split.by = "tissue", group.by = "tissue", repel = T, shuffle = T, pt.size = 0.1, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(merged, reduction = red.use, split.by = "orig.ident", group.by = "orig.ident",repel = T, shuffle = T, pt.size = 0.1, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(merged, reduction = red.use, split.by = "tissue", group.by = "seurat_clusters", repel = T, shuffle = T, pt.size = 0.1, label.size = 6) + 
    theme(text = element_text(size = 20)) + scale_color_tableau("Tableau 20")
In [ ]:
fig.size(8,6)
VlnPlot(merged, features = c("FOXP3", "IL2RA", "AREG", "CXCR6"), group.by = "tissue", ncol = 1, pt.size = 0.00)
In [47]:
table(merged$seurat_clusters)
table(merged$seurat_clusters, merged$orig.ident)
table(merged$seurat_clusters, merged$tissue)
   0    1    2    3    4    5    6    7    8    9 
8982 8703 6670 5993 3482 2517 1175  795  400  266 
   
    AMP2 AMPrep HD_Luo SF.BL
  0  337    415   7346   884
  1 1576    840   5423   864
  2  766    492   4376  1036
  3  664    492   4088   749
  4  336    309   2278   559
  5  932    263   1095   227
  6  322    404    252   197
  7   26     50    611   108
  8   60     27    213   100
  9    0      6    165    95
   
    PBMC.Ctrl PBMC.RA SFMC  Syn
  0      7346    1021  117  498
  1      5423     893  212 2175
  2      4376     743  461 1090
  3      4088     649  279  977
  4      2278     246  394  564
  5      1095     212   81 1129
  6       252      81  172  670
  7       611      95   38   51
  8       213      46   70   71
  9       165      67   30    4
In [48]:
fig.size(4,10)
VlnPlot(merged, features = "FOXP3", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00) + 
    scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "IL2RA", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00) + 
    scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "AREG", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00) + 
    scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "CXCR6", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00) + 
    scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "FOXP3", group.by = "seurat_clusters", ncol = 1, pt.size = 0) + 
    scale_fill_tableau("Tableau 20")

Downsampling HD PBMC dataset and remerge¶

In [49]:
median(table(merged$orig.ident)[c(1,2,4)])
max(table(merged$orig.ident)[c(1,2,4)])

median(table(merged$tissue)[2:4])
max(table(merged$tissue)[2:4])
4819
5019
4053
7229
In [50]:
n <- max(table(merged$tissue)[2:4])
n
set.seed(1)
ctrl.bl.subsampled <- ctrl.bl[, sample(colnames(ctrl.bl), size = n, replace=F)]
7229
In [51]:
merged <- merge(amp2, c(amprep, sf, ctrl.bl.subsampled))

merged
table(merged$tissue)
table(merged$orig.ident)

Idents(merged) <- "tissue"
merged <- subset(merged, ident = "Syn_OA", invert = TRUE)
merged
An object of class Seurat 
38406 features across 20714 samples within 3 assays 
Active assay: RNA (38224 features, 0 variable features)
 2 layers present: counts, data
 2 other assays present: ADT, HTO
PBMC.Ctrl   PBMC.RA      SFMC       Syn    Syn_OA 
     7229      4053      1854      7229       349 
  AMP2 AMPrep HD_Luo  SF.BL 
  5368   3298   7229   4819 
An object of class Seurat 
38406 features across 20365 samples within 3 assays 
Active assay: RNA (38224 features, 0 variable features)
 2 layers present: counts, data
 2 other assays present: ADT, HTO

Preprocessing¶

In [52]:
merged <- merged %>% 
            NormalizeData(normalization.method = "LogNormalize", scale.factor = 10000) %>% 
            FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>% 
            ScaleData() %>% 
            # RunBalancedPCA(weight.by = "tissue")
            RunPCA(verbose = T)
Centering and scaling data matrix

PC_ 1 
Positive:  RBFOX2, APOO, FP236383.3, LTB, MT-ATP8, OOEP, STEAP1B, SLC35F1, RPS10-NUDT3, PTPRCAP 
	   BAIAP2L1, FP671120.4, MALAT1, AC004448.2, PLAC8, RPLP0, CEACAM4, FXYD2, MT-ND3, AL136456.1 
	   FCER1G, TEX14, AIF1, SNED1, ANK2, TRAV8-2, CRIP2, FAAH2, AQP3, TRBV5-1 
Negative:  TYMS, MKI67, RRM2, PCLAF, UBE2C, BIRC5, TOP2A, CDK1, NUSAP1, TK1 
	   CCNA2, KIFC1, CDT1, PKMYT1, TPX2, ASF1B, CLSPN, CENPF, AURKB, CDCA5 
	   ZWINT, GTSE1, UHRF1, MYBL2, ASPM, STMN1, SPC25, DLGAP5, HIST1H1B, CENPW 
PC_ 2 
Positive:  JUND, IGKC, AES, MT-ATP6, MT-ND4, RPS10, SEPT7, SEPT6, KIAA1551, MALAT1 
	   FOSB, XIST, MT-ND1, MT-ND2, RARRES3, MT-CO3, IGLC2, NEAT1, FOS, TRAC 
	   POLR2J3.1, MINOS1, C6orf48, SEPT9, CREM, FAM96B, IGHG3, ZNF331, FAM129A, SOCS3 
Negative:  MT-ATP8, APOO, RBFOX2, PTPRCAP, RPLP0, NME2, FP236383.3, MIF, PFN1, PPIA 
	   OOEP, STEAP1B, SLC35F1, CNN2, RPS10-NUDT3, ACTB, AC004448.2, BAIAP2L1, STMN1, TUBB 
	   RRM2, FP671120.4, HMGN2, UBE2C, MKI67, TYMS, PKMYT1, CORO1A, CCNA2, PCLAF 
PC_ 3 
Positive:  HLA-A, IL32, CD74, CRIP1, PFN1, S100A4, GAPDH, ACTB, LGALS3, FOXP3 
	   HLA-DRB5, ARPC1B, HLA-B, MYL6, LINC01943, ANXA2, TMSB10, S100A10, MT1E, CTSC 
	   MT2A, MIF, LINC01578, TAGLN2, UCP2, S100A6, LY6E, TNFRSF18, HLA-DRB1, MT1X 
Negative:  MT-ND3, MT-ND1, MT-ND4, RPS10, IGKC, MT-CYB, MT-CO3, AES, MALAT1, MT-CO2 
	   MT-ND2, C6orf48, KIAA1551, SEPT6, UBE2C, RARRES3, SEPT7, IGLC2, TOP2A, GTSE1 
	   CCNA2, BIRC5, MKI67, RRM2, DLGAP5, AURKB, POLR2J3.1, IGHG3, ASPM, KIFC1 
PC_ 4 
Positive:  JUNB, NR4A2, DUSP1, ZFP36, FOS, CSRNP1, DUSP2, JUN, ZNF331, PPP1R15A 
	   TNFAIP3, SLC2A3, NR4A3, DNAJB1, FOSB, CDKN1A, CD69, RGCC, NR4A1, CXCR4 
	   GEM, YPEL5, NFKBIA, KLF6, BTG2, PTGER4, AREG, TUBB4B, SERTAD1, CREM 
Negative:  MT-CO3, GNAI2, AES, SEPT7, SEPT6, GNAS, FAM129A, TMSB4X, KIAA1551, TRAC 
	   MBD2, POLR2J3.1, MT-CO2, MINOS1, CD81, FAM96B, SEPT1, MALAT1, HNRNPD, MZT2B 
	   UBALD2, SEPT9, C8orf59, RPS10, C19orf70, MT-ND4, C15orf53, FAM89B, SEPT2, MT-CYB 
PC_ 5 
Positive:  GINS2, MCM2, E2F1, CDC45, CDC6, MCM7, UHRF1, DTL, MCM4, CDT1 
	   MCM5, TYMS, CLSPN, PAQR4, TK1, PCNA, FEN1, MCM6, GZMH, NKG7 
	   PCLAF, MCM10, HELLS, FAM111B, CENPU, MCM3, CHEK1, UNG, DHFR, MATK 
Negative:  PLK1, CDC20, DLGAP5, ASPM, KIF14, CENPA, HMMR, CCNB1, UBE2C, TROAP 
	   CDCA3, CENPE, CCNB2, GTSE1, CENPF, CDCA8, KIF20A, BIRC5, AURKB, TOP2A 
	   KIF23, TPX2, KIF2C, CDCA2, PRR11, MTRNR2L12, UBALD2, CKAP2L, AURKA, MXD3 

In [53]:
fig.size(5,5)
merged <- RunHarmony(merged, group.by.vars=c("orig.ident", "donorID", "tissue"), 
                     reduction.use = "pca", 
                     plot_convergence = TRUE, 
                     early_stop = T)

# ElbowPlot(merged, ndims = 50, reduction = "harmony") 
# ElbowPlot(merged, ndims = 50, reduction = "pca")
Transposing data matrix

Initializing state using k-means centroids initialization

Harmony 1/10

Harmony 2/10

Harmony 3/10

Harmony converged after 3 iterations

In [54]:
set.seed(1)
merged <- Run_uwot_umap(merged, min_dist = 0.5, spread = 1.5)
merged <- FindClusters(merged, graph.name = 'humap_fgraph',
                                  resolution = 0.8, verbose = TRUE)


fig.size(5, 6)

# clustercols <- c(colorRampPalette(brewer.pal(8, "Set2"))(13)[1:10], colorRampPalette(brewer.pal(8, "Set1"))(13)[1:9], colorRampPalette(brewer.pal(8, "Set3"))(13)[1:9])
red.use <- "humap"
DimPlot(merged, reduction = red.use, label = T, repel = T, shuffle = T, label.size = 6) + 
    theme(text = element_text(size = 20)) + scale_color_tableau("Tableau 20")
DimPlot(merged, reduction = red.use,group.by = "tissue", label = T, repel = T, shuffle = T, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(merged, reduction = red.use,group.by = "orig.ident", label = F, repel = T, shuffle = T, label.size = 6) + theme(text = element_text(size = 20))

fig.size(6, 6)
DimPlot(merged, reduction = red.use,group.by = "sex", label = F, repel = T, shuffle = T, label.size = 6) + theme(text = element_text(size = 20))
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 20365
Number of edges: 251239

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7047
Number of communities: 9
Elapsed time: 1 seconds
In [55]:
fig.size(8, 8)
DimPlot(merged, reduction = red.use,group.by = "tissue", label = T, repel = T, shuffle = T, pt.size = 0.1, label.size = 6) + 
theme(text = element_text(size = 20)) + scale_color_tableau("Tableau 10")
In [56]:
fig.size(12,12)
FeaturePlot(merged, reduction = red.use,features = c("FOXP3", "IL2RA", "AREG", "CXCR6", "MKI67", "CD4", "CD8A", "CCR7", "TIGIT"))
In [57]:
fig.size(5, 20)
DimPlot(merged, reduction = red.use, split.by = "tissue", group.by = "tissue", repel = T, shuffle = T, pt.size = 0.1, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(merged, reduction = red.use, split.by = "orig.ident", group.by = "orig.ident",repel = T, shuffle = T, pt.size = 0.1, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(merged, reduction = red.use, split.by = "tissue", group.by = "seurat_clusters", repel = T, shuffle = T, pt.size = 0.1, label.size = 6) + 
    theme(text = element_text(size = 20)) + scale_color_tableau("Tableau 20")
In [58]:
fig.size(4,10)
VlnPlot(merged, features = "FOXP3", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00000001) + 
    scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "IL2RA", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00000001) + 
    scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "AREG", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00000001) + 
    scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "CXCR6", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00000001) + 
    scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "FOXP3", group.by = "seurat_clusters", ncol = 1, pt.size = 0) + 
    scale_fill_tableau("Tableau 20")

Cluster markers¶

In [59]:
Idents(merged) <- "seurat_clusters"
Tregs.markers <- wilcoxauc(merged, "seurat_clusters")
In [60]:
options(repr.matrix.max.cols=150, repr.matrix.max.rows=200)
m.show <- Tregs.markers %>%
    group_by(group) %>% filter(padj <0.05 & auc > 0.6) %>% 
    arrange(group, desc(logFC)) %>% 
    #slice_max(n = 30, order_by = logFC)%>% 
    select(feature, group) %>% 
    mutate(row = row_number()) %>% 
    tidyr::pivot_wider(names_from = group, values_from = feature)

# m.show %>% write.csv(paste0(save.path, "integrated.cluster.markers.csv"))
m.show[1:25,]
A tibble: 25 × 10
row012345678
<int><chr><chr><chr><chr><chr><chr><chr><chr><chr>
1GAS5 CD74 RPL17 FOS CD74 STMN1 MT-ATP6 TRBV7-6 ISG15
2EEF1G IL32 TXNIP JUNB SRGN TUBA1B MT-ND4 TRBV7-4 IFI6
3MT-ATP8 S100A4 IFITM1 JUND S100A4 TUBB MT-ND2 TRBV7-9 MX1
4APOO HLA-DRB1EEF1G JUN MT2A HMGB2 MALAT1 TRBV7-2 MT2A
5SNHG29 HLA-DRB5RBFOX2 FOSB CD2 TYMS IGKC IFITM1 LY6E
6EEF1B2 HLA-DPA1TLE5 NR4A2 TNFRSF18CD74 MT-ND1 EEF1G OAS1
7RPL17 HLA-DPB1PTPRCAPZNF331 GAPDH GZMA MT-ND3 RPL17 STAT1
8TCF7 LGALS1 MT-ND4LDUSP1 S100A11 CCL5 MT-CO3 TRBV7-5 ISG20
9RPS12 CRIP1 RPL36A MT-ATP6 LGALS1 GAPDH NEAT1 TLE5 XAF1
10RPS13 ANXA2 RPL8 RPS10 MYL6 HMGB1 MT-ND5 TXNIP OASL
11RBFOX2 FOXP3 GSTK1 DUSP2 CRIP1 MKI67 MT-CYB RPS4Y1 EIF2AK2
12FP236383.3CYTOR LDHB IGKC COTL1 H2AFZ XIST GAS5 HERC5
13RPL32 CLIC1 EEF1A1 YPEL5 TNFRSF4 MT2A MT-CO1 NME2 IFIT3
14SNHG6 LGALS3 RPS8 CREM ARPC1B HLA-DRA MT-CO2 EEF2 IFIT1
15LEF1 SH3BGRL3RPL34 BTG2 LGALS3 COTL1 SYNE2 FXYD5 EPSTI1
16TXNIP GBP5 RPL30 MT-ND4 LAG3 HMGN2 RNF213 RESF1 RNF213
17MT-ND4L S100A10 RPS27 CD69 CLIC1 DUT KIAA1551 LINC01578TRIM22
18RPS5 HLA-A EEF2 DNAJA1 KLRB1 NKG7 TRAC PLAAT4 MX2
19CCR7 S100A6 RPS21 PPP1R15APKM DEK N4BP2L2 IL2RG SAT1
20PRKCQ-AS1 CTSC RPS12 SRGN ENO1 HLA-DRB1POLR2J3.1SEPTIN1 IFITM1
21RPL30 LSP1 RPL32 CXCR4 UCP2 HIST1H4CNKTR PFN1 CD74
22RPS8 HLA-DRA RPS3 RGS1 TNFRSF1BLGALS1 PCSK7 EEF1A1 SP110
23RPL36A ARPC1B RPL39 LMNA CTSC PCLAF DDX17 NA SP100
24RPL5 ISG20 RPS15A TNFAIP3 IL2RG H2AFV SEPT6 NA IFI44L
25GIMAP7 MYL6 RPL10 SOCS3 CTLA4 TPI1 IGHM NA IRF7
In [61]:
#Eexplore TRBV7 genes
trbv7 <- rownames(merged@assays$RNA@scale.data)[grepl("TRBV7", rownames(merged@assays$RNA@scale.data))]
trbv <- rownames(merged@assays$RNA@scale.data)[grepl("TRBV", rownames(merged@assays$RNA@scale.data))]

trav <- rownames(merged@assays$RNA@scale.data)[grepl("TRAV", rownames(merged@assays$RNA@scale.data))]

trbv7
trbv
trav
  1. 'TRBV7-2'
  2. 'TRBV7-3'
  3. 'TRBV7-4'
  4. 'TRBV7-5'
  5. 'TRBV7-6'
  6. 'TRBV7-7'
  7. 'TRBV7-9'
  1. 'TRBV1'
  2. 'TRBV2'
  3. 'TRBV3-1'
  4. 'TRBV4-1'
  5. 'TRBV5-1'
  6. 'TRBV6-1'
  7. 'TRBV4-2'
  8. 'TRBV6-2'
  9. 'TRBV7-2'
  10. 'TRBV6-4'
  11. 'TRBV7-3'
  12. 'TRBV5-3'
  13. 'TRBV9'
  14. 'TRBV10-1'
  15. 'TRBV11-1'
  16. 'TRBV12-1'
  17. 'TRBV10-2'
  18. 'TRBV11-2'
  19. 'TRBV12-2'
  20. 'TRBV6-5'
  21. 'TRBV7-4'
  22. 'TRBV5-4'
  23. 'TRBV6-6'
  24. 'TRBV7-5'
  25. 'TRBV5-5'
  26. 'TRBV6-7'
  27. 'TRBV7-6'
  28. 'TRBV5-6'
  29. 'TRBV6-8'
  30. 'TRBV7-7'
  31. 'TRBV5-7'
  32. 'TRBV7-9'
  33. 'TRBV13'
  34. 'TRBV10-3'
  35. 'TRBV11-3'
  36. 'TRBV12-3'
  37. 'TRBV12-4'
  38. 'TRBV12-5'
  39. 'TRBV14'
  40. 'TRBV15'
  41. 'TRBV16'
  42. 'TRBV18'
  43. 'TRBV19'
  44. 'TRBV20-1'
  45. 'TRBV21-1'
  46. 'TRBV23-1'
  47. 'TRBV24-1'
  48. 'TRBV25-1'
  49. 'TRBV27'
  50. 'TRBV28'
  51. 'TRBV29-1'
  52. 'TRBV30'
  1. 'TRAV1-1'
  2. 'TRAV1-2'
  3. 'TRAV2'
  4. 'TRAV3'
  5. 'TRAV4'
  6. 'TRAV5'
  7. 'TRAV6'
  8. 'TRAV8-1'
  9. 'TRAV9-1'
  10. 'TRAV10'
  11. 'TRAV12-1'
  12. 'TRAV8-2'
  13. 'TRAV8-3'
  14. 'TRAV13-1'
  15. 'TRAV12-2'
  16. 'TRAV8-4'
  17. 'TRAV8-5'
  18. 'TRAV13-2'
  19. 'TRAV14DV4'
  20. 'TRAV9-2'
  21. 'TRAV12-3'
  22. 'TRAV8-6'
  23. 'TRAV16'
  24. 'TRAV17'
  25. 'TRAV19'
  26. 'TRAV20'
  27. 'TRAV21'
  28. 'TRAV22'
  29. 'TRAV23DV6'
  30. 'TRAV24'
  31. 'TRAV25'
  32. 'TRAV26-1'
  33. 'TRAV27'
  34. 'TRAV29DV5'
  35. 'TRAV30'
  36. 'TRAV26-2'
  37. 'TRAV34'
  38. 'TRAV35'
  39. 'TRAV36DV7'
  40. 'TRAV38-1'
  41. 'TRAV38-2DV8'
  42. 'TRAV39'
  43. 'TRAV40'
  44. 'TRAV41'
In [62]:
fig.size(10, 10)
FeaturePlot(merged, features = trbv7, ncol = 3)
In [63]:
fig.size(10, 10)
dat.plot <- FetchData(merged, vars = c(trbv), slot = "scale.data")
dat.meta <- FetchData(merged, vars = c("seurat_clusters", "tissue", "orig.ident"), slot = "data")
dat.meta <- dat.meta%>% arrange(seurat_clusters, tissue, orig.ident)
pheatmap(dat.plot[rownames(dat.meta),], scale = "none", cluster_rows = F, cluster_cols = T, show_rownames = F, 
         annotation_row = dat.meta)
Warning message:
“The `slot` argument of `FetchData()` is deprecated as of SeuratObject 5.0.0.
ℹ Please use the `layer` argument instead.”
In [64]:
fig.size(10, 10)
dat.plot <- FetchData(merged, vars = c(trbv7), cells = rownames(merged@meta.data)[merged$seurat_clusters == 7], slot = "scale.data")
dat.meta <- FetchData(merged, , cells = rownames(merged@meta.data)[merged$seurat_clusters == 7], vars = c("seurat_clusters", "tissue", "orig.ident"), slot = "data")
dat.meta <- dat.meta%>% arrange(seurat_clusters, tissue, orig.ident)
pheatmap(dat.plot[rownames(dat.meta),], scale = "none", cluster_rows = F, cluster_cols = T, show_rownames = F, 
         annotation_row = dat.meta)
In [65]:
table(merged$seurat_clusters, merged$orig.ident)
table(merged$seurat_clusters, merged$tissue)
# table(merged$seurat_clusters[merged$seurat_clusters == 8], merged$donorID[merged$seurat_clusters == 8], merged$tissue[merged$seurat_clusters == 8])
   
    AMP2 AMPrep HD_Luo SF.BL
  0  473    523   2967  1223
  1  792    573   1279  1334
  2  388    471   2107   993
  3 2374    949    216   198
  4  354    285    404   579
  5  286    400     45   179
  6  290     49     73    74
  7    0     31     98   141
  8   62     17     40    98
   
    PBMC.Ctrl PBMC.RA SFMC  Syn
  0      2967    1537   65  617
  1      1279     870  671 1158
  2      2107    1062  174  616
  3       216     127  160 3234
  4       404     172  477  569
  5        45      78  160  627
  6        73      64   29  320
  7        98     106   48   18
  8        40      37   70   70

Subclustering¶

In [66]:
Idents(merged) <- "seurat_clusters"
merged <- FindSubCluster(merged, cluster = 3, resolution = 0.3, graph.name = "humap_fgraph")
# Idents(merged) <- "sub.cluster"
# merged <- FindSubCluster(merged, cluster = 4, resolution = 0.3, graph.name = "humap_fgraph")
Idents(merged) <- "sub.cluster"
merged <- FindSubCluster(merged, cluster = 5, resolution = 0.3, graph.name = "humap_fgraph")

fig.size(10,10)
DimPlot(merged, reduction = red.use, group.by = "sub.cluster", label = T, repel = T, shuffle = T, pt.size = 0.5, label.size = 6) + 
    theme(text = element_text(size = 20)) +
    scale_color_tableau("Tableau 20")
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 3737
Number of edges: 37545

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7440
Number of communities: 3
Elapsed time: 0 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 910
Number of edges: 9917

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8293
Number of communities: 4
Elapsed time: 0 seconds
In [ ]:
fig.size(15,15)
VlnPlot(merged, features = c("FOXP3", "IL2RA", "AREG", "CXCR6", "CD8A", "MKI67", "TCF7"), group.by = "sub.cluster", ncol = 1, pt.size = 0.00000001)
In [68]:
table(merged$sub.cluster)
# table(merged$sub.cluster, merged$orig.ident)
   0    1    2  3_0  3_1  3_2    4  5_0  5_1  5_2  5_3    6    7    8 
5186 3978 3959 1835 1319  583 1622  382  210  198  120  486  270  217 
In [ ]:
fig.size(5,20)
DimPlot(merged, reduction = red.use, split.by = "tissue", group.by = "sub.cluster", repel = T, shuffle = T, pt.size = 0.1, label.size = 6) + 
    theme(text = element_text(size = 20)) + scale_color_tableau("Tableau 20")
DimPlot(merged, reduction = red.use, split.by = "orig.ident", group.by = "sub.cluster", repel = T, shuffle = T, pt.size = 0.1, label.size = 6) + 
    theme(text = element_text(size = 20)) + scale_color_tableau("Tableau 20")
In [70]:
fig.size(5,5)
#mito %
VlnPlot(merged, features = "percent.mito", pt.size = 0.0, group.by = "sub.cluster") + theme(legend.position = "none") + ggtitle(NULL) +
  xlab(NULL) + ylab("% mitochondrial genes") + geom_hline(yintercept=15)

Remove non Treg clusters:¶

5_2, 5_0 - non FOXP3 proliferating 5_3 - CD8+ proliferating 7 - contamination clusters from VDJ sequencing - an artifact coming from only the datasets that were generated by VDJ sequencing, and expressing high levels of TRBV7 genes. Therefore is removed. 6 - high mt

In [71]:
Idents(merged) <- "sub.cluster"
merged <- subset(merged, idents = c("5_0", "5_2", "5_3", 6, 7), invert = T)
In [72]:
merged
An object of class Seurat 
38406 features across 18909 samples within 3 assays 
Active assay: RNA (38224 features, 2000 variable features)
 3 layers present: counts, data, scale.data
 2 other assays present: ADT, HTO
 3 dimensional reductions calculated: pca, harmony, humap

Redo preproccessing¶

In [73]:
merged <- NormalizeData(merged, normalization.method = "LogNormalize", scale.factor = 10000) %>% 
        FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>% 
        ScaleData() %>% 
        # RunBalancedPCA(weight.by = "tissue", npc = 50)
        RunPCA(verbose = F)
Centering and scaling data matrix

In [74]:
# merged <- NormalizeData(merged, normalization.method = "CLR", margin = 2, assay = "ADT") %>% 
            # ScaleData(assay = "ADT")
In [ ]:
fig.size(5,5)
DimPlot(merged, reduction = "pca", group.by = "orig.ident",shuffle = T)
DimPlot(merged, reduction = "pca", group.by = "tissue",shuffle = T)
fig.size(5,15)
DimPlot(merged, reduction = "pca", group.by = "donorID",shuffle = T, label = F)

Harmony¶

In [76]:
fig.size(5,5)
merged <- RunHarmony(merged, group.by.vars=c("orig.ident", "donorID", "tissue"), #theta = c(2,2,1),
                     reduction.use = "pca", 
                     plot_convergence = TRUE, 
                     early_stop = T)


ElbowPlot(merged, ndims = 50, reduction = "harmony") 
ElbowPlot(merged, ndims = 50, reduction = "pca")
Transposing data matrix

Initializing state using k-means centroids initialization

Harmony 1/10

Harmony 2/10

Harmony 3/10

Harmony converged after 3 iterations

In [77]:
# Run UMAP
set.seed(1)
merged <- Run_uwot_umap(merged, min_dist = 0.5, spread = 1.5)
merged <- FindClusters(merged, graph.name = 'humap_fgraph',
                                  resolution = 0.8, verbose = TRUE)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 18909
Number of edges: 233331

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7140
Number of communities: 10
Elapsed time: 1 seconds
In [78]:
fig.size(6,7)

Idents(merged) <- "seurat_clusters"
DimPlot(merged, label = T, repel = T, shuffle = T, label.size = 6) + 
    scale_color_tableau("Tableau 20") + theme(text = element_text(size = 20))
DimPlot(merged, group.by = "tissue", label = T, repel = T, shuffle = T, pt.size = 0.5, label.size = 6) + 
    theme(text = element_text(size = 20)) + scale_fill_tableau("Tableau 20")
DimPlot(merged, group.by = "orig.ident", label = F, repel = T, shuffle = T,pt.size = 0.5, label.size = 6) + 
    theme(text = element_text(size = 20))
In [ ]:
fig.size(5,15)

Idents(merged) <- "seurat_clusters"
DimPlot(merged, label = T, repel = T, shuffle = T, label.size = 6, split.by = "tissue") + 
    scale_color_tableau("Tableau 20") + theme(text = element_text(size = 20))
In [80]:
fig.size(6,7)
FeaturePlot(merged, features = "FOXP3", order = F)
FeaturePlot(merged, features = "IL2RA", order = F)
FeaturePlot(merged, features = "AREG", order = F)
FeaturePlot(merged, features = "CXCR6", order = F)
FeaturePlot(merged, features = "TCF7", order = F)
FeaturePlot(merged, features = "CCR7", order = F)
In [ ]:
fig.size(5, 20)
FeaturePlot(merged, features = "AREG", order = T, split.by = "tissue")
FeaturePlot(merged, features = "AREG", order = F, split.by = "tissue")

FeaturePlot(merged, features = "CXCR6", order = F, split.by = "tissue")
FeaturePlot(merged, features = "CCR7", order = F, split.by = "tissue")
FeaturePlot(merged, features = "TCF7", order = F, split.by = "tissue")
In [82]:
fig.size(5, 20)
DimPlot(merged, split.by = "tissue", group.by = "tissue", repel = T, shuffle = T, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(merged, split.by = "orig.ident", group.by = "orig.ident",repel = T, shuffle = T, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(merged, split.by = "tissue", group.by = "seurat_clusters",repel = T, shuffle = T, label.size = 6) + 
    theme(text = element_text(size = 20)) + scale_color_tableau("Tableau 20")
In [83]:
fig.size(12, 18)
umap.plot <- FetchData(merged, vars = c("HUMAP_1", "HUMAP_2", "seurat_clusters"))
umap.bg <- umap.plot %>% select(-seurat_clusters)

ggplot(umap.plot, aes(x = HUMAP_1, y = HUMAP_2)) + geom_point(data = umap.bg, color = "grey", alpha = 0.2, size = 0.01) + 
    geom_point(aes(color = seurat_clusters), size = 0.01) +
    facet_wrap(~seurat_clusters) + 
    scale_color_tableau("Tableau 20") + theme_bw(base_size = 18)
# DimPlot(merged, split.by = "seurat_clusters", group.by = "seurat_clusters",repel = T, shuffle = T, label.size = 6, ncol =5) + 
    # theme(text = element_text(size = 20)) + scale_color_tableau("Tableau 20")
In [ ]:
fig.size(10, 12)
umap.plot <- FetchData(merged, vars = c("HUMAP_1", "HUMAP_2", "tissue"))
umap.bg <- umap.plot %>% select(-tissue)

ggplot(umap.plot, aes(x = HUMAP_1, y = HUMAP_2)) + geom_point(data = umap.bg, color = "grey", size = 0.1) + 
    geom_point(color = "black", size = 0.1) +
    facet_wrap(~tissue) + 
    scale_color_tableau("Tableau 20") + theme_bw(base_size = 24)
# DimPlot(merged, split.by = "seurat_clusters", group.by = "seurat_clusters",repel = T, shuffle = T, label.size = 6, ncol =5) + 
    # theme(text = element_text(size = 20)) + scale_color_tableau("Tableau 20")
In [85]:
fig.size(3,8)
VlnPlot(merged, features = "FOXP3", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00) + 
    scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "IL2RA", group.by = "seurat_clusters", ncol = 1, pt.size = 0.0) + 
    scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "TCF7", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00) + 
    scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "CCR7", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00) + 
    scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "CXCR6", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00) + 
    scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "CD8A", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00) + 
    scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "CD8B", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00) + 
    scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "AREG", group.by = "seurat_clusters", ncol = 1, pt.size = 0.00) + 
    scale_fill_tableau("Tableau 20")
fig.size(3, 12)
VlnPlot(merged, features = "AREG", group.by = "seurat_clusters", split.by = "tissue", ncol = 1, pt.size = 0.00) + 
    scale_fill_tableau("Tableau 20")
The default behaviour of split.by has changed.
Separate violin plots are now plotted side-by-side.
To restore the old behaviour of a single split violin,
set split.plot = TRUE.
      
This message will be shown once per session.

Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.

Try subclustering¶

In [86]:
Idents(merged) <- "seurat_clusters"
merged <- FindSubCluster(merged, cluster = 3, resolution = 0.3, graph.name = "humap_fgraph")
# merged$seurat_clusters[merged$seurat_clusters == 7] <- 1 # cluster 7 is 4 cells in total. 
# Idents(merged) <- "sub.cluster"
# merged <- FindSubCluster(merged, cluster = 1, resolution = 0.3, graph.name = "humap_fgraph")
# Idents(merged) <- "sub.cluster"
# merged <- FindSubCluster(merged, cluster = 5, resolution = 0.3, graph.name = "humap_fgraph")

fig.size(10,10)
DimPlot(merged, reduction = red.use, group.by = "sub.cluster", label = T, repel = T, shuffle = T, pt.size = 0.5, label.size = 6) + 
    theme(text = element_text(size = 20)) +
    scale_color_tableau("Tableau 20")
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 2972
Number of edges: 30148

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7555
Number of communities: 3
Elapsed time: 0 seconds
In [ ]:
fig.size(12, 18)
umap.plot <- FetchData(merged, vars = c("HUMAP_1", "HUMAP_2", "sub.cluster"))
umap.bg <- umap.plot %>% select(-sub.cluster)

ggplot(umap.plot, aes(x = HUMAP_1, y = HUMAP_2)) + geom_point(data = umap.bg, color = "grey", alpha = 0.2, size = 0.01) + 
    geom_point(aes(color = sub.cluster), size = 0.01) +
    facet_wrap(~sub.cluster) + 
    scale_color_tableau("Tableau 20") + theme_bw(base_size = 18)
In [88]:
fig.size(3,8)
VlnPlot(merged, features = "FOXP3", group.by = "sub.cluster", ncol = 1, pt.size = 0.00) + 
    scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "IL2RA", group.by = "sub.cluster", ncol = 1, pt.size = 0.0) + 
    scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "TCF7", group.by = "sub.cluster", ncol = 1, pt.size = 0.00) + 
    scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "CCR7", group.by = "sub.cluster", ncol = 1, pt.size = 0.00) + 
    scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "CXCR6", group.by = "sub.cluster", ncol = 1, pt.size = 0.00) + 
    scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "AREG", group.by = "sub.cluster", ncol = 1, pt.size = 0.00) + 
    scale_fill_tableau("Tableau 20")
VlnPlot(merged, features = "MKI67", group.by = "sub.cluster", ncol = 1, pt.size = 0.00) + 
    scale_fill_tableau("Tableau 20")
In [89]:
table(merged$sub.cluster, merged$orig.ident)
table(merged$sub.cluster, merged$tissue)
     
      AMP2 AMPrep HD_Luo SF.BL
  0    287    487   2911  1144
  1    338    470   1500  1171
  2    144    371   1746   813
  3_0 1125    408     91    40
  3_1  815    280     16    62
  3_2  116     10      2     7
  4    661    323     93   668
  5    827    321     37   120
  6     60    104    492   221
  7     60     18     49   120
  8     66     85     35    50
  9     13     22     54    56
     
      PBMC.Ctrl PBMC.RA SFMC  Syn
  0        2911    1464   50  404
  1        1500     958  416  605
  2        1746     903  118  307
  3_0        91      61   30 1482
  3_1        16      19   50 1088
  3_2         2       3    4  126
  4          93      82  615  955
  5          37      36  117 1115
  6         492     172  112  101
  7          49      41   90   67
  8          35      15   46  140
  9          54      58   15   18
In [90]:
fig.size(5, 20)
FeaturePlot(merged, "FOXP3", split.by = "tissue", pt.size = 0.01)
FeaturePlot(merged, "IL2RA", split.by = "tissue", pt.size = 0.01)
FeaturePlot(merged, "AREG", split.by = "tissue", pt.size = 0.01)
FeaturePlot(merged, "CXCR6", split.by = "tissue", pt.size = 0.01)
FeaturePlot(merged, "MKI67", split.by = "tissue", pt.size = 0.01)

Find Cluster markers¶

In [91]:
Tregs.markers <- wilcoxauc(merged, "seurat_clusters")
write.csv(Tregs.markers, paste0(save.path, "Integrated.Treg.Cluster.Markers.Wilcox.allRes.csv"))
In [92]:
options(repr.matrix.max.cols=150, repr.matrix.max.rows=200)

Tregs.markers %>% 
    group_by(group) %>% 
    filter(padj < 0.05 & auc > 0.6) %>% 
    arrange(group, desc(logFC)) %>% 
    #slice_max(n = 40, order_by = logFC)%>% 
    dplyr::select(feature, group) %>% 
    mutate(row = row_number()) %>% 
    tidyr::pivot_wider(names_from = group, values_from = feature) -> Treg.show
write.csv(Treg.show, paste0(save.path, "Integrated.Treg.Cluster.Markers.Wilcox.filt.csv"))
Treg.show[1:40,]
A tibble: 40 × 11
row0123456789
<int><chr><chr><chr><chr><chr><chr><chr><chr><chr><chr>
1GAS5 CD74 IFITM1 FOS TNFRSF18 FOS AQP3 ISG15 STMN1 GZMK
2MT-ATP8 IL32 MT-ATP8 JUND CD74 JUND CRIP1 MX1 GAPDH IFNG-AS1
3EEF1G HLA-DRB5 RPL17 MT-ATP6 MT2A JUN TLE5 IFI6 COTL1 GZMA
4APOO HLA-DRB1 EEF1G JUNB SRGN LMNA CNN2 LY6E TUBA1B CNN2
5SNHG29 CRIP1 TXNIP FOSB TNFRSF4 FOSB KLRB1 MT2A TYMS GIMAP7
6RPL17 S100A4 RBFOX2 IGKC LGALS1 JUNB COTL1 XAF1 CD74 IL10RA
7EEF1B2 HLA-DPB1 MT-ND4L RPS10 CD2 CREM CD52 ISG20 ENO1 CXCR3
8TCF7 HLA-DPA1 APOO NR4A2 MTRNR2L12MT-ATP6 S100A4 STAT1 DUT LYAR
9MT-ND4L ANXA2 RPS4Y1 JUN CTSC DUSP1 PFN1 OAS1 LGALS1 FCMR
10RBFOX2 SH3BGRL3 TLE5 ZNF331 S100A4 RGS1 S100A11 EPSTI1 SLC25A5PLAAT4
11FP236383.3TLE5 FP236383.3MT-ND4 HLA-DRB1 TNFAIP3 LIME1 EIF2AK2 PGAM1 FOXP1
12PRKCQ-AS1 LIME1 SNHG29 DUSP1 GAPDH SRGN PTPRCAP TRIM22 TPI1 GIMAP4
13RPS12 LGALS1 RPL36A MT-ND1 MT-ATP6 NR4A2 ANXA1 RNF213 PFN1 LIME1
14SNHG6 CYTOR SNHG6 YPEL5 ENO1 PPP1R15ASH3BGRL3HERC5 MYL6 IKZF3
15CCR7 LSP1 PTPRCAP MT-ND2 CTLA4 KLF6 ARPC1B MTRNR2L12CRIP1 CST7
16LEF1 S100A10 RPL8 DUSP2 IL32 ZNF331 LIMS1 MX2 PKM FYB1
17RPS13 PFN1 LDHB CD69 MYL6 DUSP2 ANXA2 CD74 TUBB VAMP2
18TXNIP CD52 SELL DNAJA1 LGALS3 DNAJA1 NOSIP SP100 ACTB GIMAP5
19RPL36A GBP5 LIME1 BTG2 BATF NEAT1 LAPTM5 SAMD9L ARPC1B RPS15A
20IFITM1 ITGB1 RPS8 SOCS3 TYMP HSP90AA1CD99 UBE2L6 LDHA NA
21RPL32 SELPLG EEF1A1 CXCR4 LINC01943YPEL5 GPR25 OASL MCM5 NA
22RPS5 EMP3 EEF2 SOCS1 TIGIT BTG2 SEPTIN9 IFITM1 PCLAF NA
23GIMAP7 S100A6 RPS21 SRGN CLIC1 MT-ND4 CORO1A IFIT3 PCNA NA
24RPS8 HLA-A GSTK1 PPP1R15ANEAT1 RPS10 UCP2 SAT1 MT2A NA
25GIMAP5 ARHGDIB RPL32 MT-ND3 TNFRSF1B MYADM PPP1CA BST2 VIM NA
26RPL30 CLIC1 RPL34 CREM SPOCK2 DUSP4 IL2RG SP110 DEK NA
27RPL5 ARPC1B RPS12 HSP90AA1TIMP1 SAT1 EMP3 ADAR MCM7 NA
28PABPC1 HLA-C RPLP0 AREG CST7 CD69 CAPZB IFIT1 ACTG1 NA
29RPS21 ALOX5AP RPL30 MT-CO3 LAG3 RGCC CD6 FOXP3 GZMA NA
30FOXP1 COTL1 RPS5 RGS1 HLA-A IGKC CD2 IRF7 RANBP1 NA
31RPL39 LINC02694RPL39 MALAT1 LMNA CXCR4 LSP1 TYMP H2AFZ NA
32RPL35A TTC39C RPS3 SLC2A3 PKM ZFP36 S100A6 IFI16 HLA-DRANA
33RPS3 FOXP3 RPL4 AES HLA-DPA1 SELENOK MYH9 LGALS9 HMGN2 NA
34RPL34 CD99 RPL5 RPS20 HLA-DRB5 UBC ACTG1 SYNE2 SNRPB NA
35RPS14 NIBAN1 PFN1 RGCC S100A11 AES ARHGDIB MT1E TXN NA
36RPS6 LGALS3 RPS15A DDX3X DUSP4 TENT5C ACTB UBALD2 HMGB1 NA
37RPL37 DOK2 RPS13 XIST JUND PHLDA1 ITGB1 IFI35 ANXA5 NA
38RPS23 ACTB RPS27 TRAC RNF213 NFKBIA CCR6 SMCHD1 ANXA2 NA
39RPS4Y1 HPGD RPS25 STAT3 FOXP3 TUBB4B FLNA GBP5 CCL5 NA
40RPS25 RAC2 RPL37 NEAT1 HLA-DRA IRF1 OSTF1 HLA-A S100A11NA
In [93]:
fig.size(6,7)
Idents(merged) <- "seurat_clusters"
DimPlot(merged, label = T, repel = T, shuffle = T, label.size = 6) + 
    scale_color_tableau("Tableau 20") + theme(text = element_text(size = 20))
In [94]:
fig.size(4,12)
Idents(merged) <- "seurat_clusters"
DimPlot(merged, label = T, repel = T, shuffle = T, label.size = 6, split.by = "tissue") + 
    scale_color_tableau("Tableau 20") + theme(text = element_text(size = 20))
In [105]:
dot.dat <- FetchData(merged, vars = c("IL2RA", "seurat_clusters", "donorID"), slot = "data") %>%  group_by(donorID, seurat_clusters) %>% 
    mutate(Mean=mean(IL2RA)) %>% 
    ungroup() %>% 
    group_by(seurat_clusters) %>% 
    mutate(n = n()) %>% 
    mutate(Mean=mean(IL2RA)) %>% 
    mutate(SD = sd(IL2RA)/sqrt(n)) %>% 
    mutate(zscore = (IL2RA - Mean)/SD) #
dot.dat %>% head
ggplot(dot.dat, aes(x = seurat_clusters, y = zscore)) + geom_boxplot() #+ geom_linerange(aes(ymin = mean_z-sd_z, ymax = mean_z+sd_z))

# dot.dat <- dot.dat %>% 
#     group_by(seurat_clusters) %>%
#     mutate(mean_z=mean(zscore)) %>% 
#     mutate(sd_z = sd(zscore)/sqrt(n)) %>% 
#     select(seurat_clusters, mean_z, sd_z, n) %>% arrange(mean_z)
# head(dot.dat)
# ggplot(dot.dat, aes(x = seurat_clusters, y = mean_z)) + geom_point() + geom_linerange(aes(ymin = mean_z-sd_z, ymax = mean_z+sd_z))
A grouped_df: 6 × 7
IL2RAseurat_clustersdonorIDMeannSDzscore
<dbl><fct><chr><dbl><int><dbl><dbl>
1.10128615300_03020.619952113050.022935069 20.9868108
1.04377811300_03020.715771734790.014337075 22.8782002
0.90903277300_03020.9241466 2470.064771527 -0.2333423
0.00000003300_03020.209471229720.009341831-22.4229311
0.97241825300_03020.619952113050.022935069 15.3679955
1.79901840300_03020.366833348290.009074467157.8258163
In [106]:
collapsed.counts <- presto::collapse_counts(merged@assays$RNA@counts, 
                        merged@meta.data, 
                        c("seurat_clusters", "donorID"))
dat.plot <- collapsed.counts$meta_data %>% tibble::rownames_to_column() %>% 
    left_join(as.data.frame(collapsed.counts$exprs_norm["IL2RA",]) %>% tibble::rownames_to_column(), by = c("rowname" ="rowname")) %>% 
    tibble::column_to_rownames("rowname") %>% 
    rename("IL2RA" = 'collapsed.counts$exprs_norm["IL2RA", ]')
dat.plot%>% head

ggplot(dat.plot, aes(x = seurat_clusters, y = IL2RA)) + geom_boxplot()
CAREFUL: get_norm makes very strong assumptions about data

A data.frame: 6 × 5
seurat_clustersdonorIDNlogUMIIL2RA
<fct><chr><int><dbl><dbl>
sample_2055300_03022311.7797411.4569859
sample_2011300_0302 610.3005840.6973401
sample_2077300_0302 1 8.8170010.9090327
sample_2033300_0302 810.6798490.8671973
sample_2000300_0302 2 9.3743281.6570391
sample_2066300_0302 2 9.1730540.0000000
In [107]:
dat.plot <- collapsed.counts$meta_data %>% tibble::rownames_to_column() %>% 
    left_join(as.data.frame(collapsed.counts$exprs_norm["FOXP3",]) %>% tibble::rownames_to_column(), by = c("rowname" ="rowname")) %>% 
    tibble::column_to_rownames("rowname") %>% 
    rename("FOXP3" = 'collapsed.counts$exprs_norm["FOXP3", ]')
dat.plot%>% head

ggplot(dat.plot, aes(x = seurat_clusters, y = FOXP3)) + geom_boxplot()
A data.frame: 6 × 5
seurat_clustersdonorIDNlogUMIFOXP3
<fct><chr><int><dbl><dbl>
sample_2055300_03022311.7797411.0157278
sample_2011300_0302 610.3005840.9860676
sample_2077300_0302 1 8.8170011.9355253
sample_2033300_0302 810.6798490.7655572
sample_2000300_0302 2 9.3743281.2658927
sample_2066300_0302 2 9.1730540.0000000
In [108]:
fig.size(3, 12)
FeaturePlot(merged, features = c("TCF7", "LEF1", "BACH2", "SATB1"), ncol = 4)
FeaturePlot(merged, features = c("PRDM1", "MAF", "IKZF3", "IKZF2"), ncol = 4)
FeaturePlot(merged, features = c("CCR7", "IL6R", "SELL", "S1PR1"), ncol = 4)
In [109]:
c0.c1.c2 <- wilcoxauc(merged, groups_use = c(0,1,2), group_by = "seurat_clusters")
options(repr.matrix.max.cols=150, repr.matrix.max.rows=200)

c0.c1.c2 %>% 
    group_by(group) %>% 
    filter(padj < 0.05 & auc > 0.5) %>% 
    arrange(group, desc(logFC)) %>% 
    #slice_max(n = 40, order_by = logFC)%>% 
    dplyr::select(feature, group) %>% 
    mutate(row = row_number()) %>% 
    tidyr::pivot_wider(names_from = group, values_from = feature) -> c0.c1.c2.show
c0.c1.c2.show[1:30,]
A tibble: 30 × 4
row012
<int><chr><chr><chr>
1TCF7 CD74 S100A4
2CCR7 S100A4 CRIP1
3EEF1B2 HLA-DRB1IFITM1
4GAS5 IL32 S100A11
5PRKCQ-AS1HLA-DRB5CRIP2
6RPS12 HLA-DPA1SH3BGRL3
7SNHG29 CRIP1 ZFP36L2
8GIMAP7 HLA-DPB1ANXA1
9RPS5 LGALS1 PTGER2
10RPS13 ANXA2 KLF6
11RPL32 SH3BGRL3TIGIT
12APOO CYTOR IFITM2
13GIMAP5 S100A10 GSTK1
14RPS14 CLIC1 IL32
15PABPC1 SRGN ITGB1
16RPL5 S100A6 S100A10
17RPL38 COTL1 AL136456.1
18RPS6 HLA-DRA DUSP1
19TXK LGALS3 ITGA4
20RPL30 HLA-A TAGLN2
21RPL35A ITGB1 JUNB
22RPS21 GBP5 RPS26
23RPL39 MYL6 PFN1
24RPS8 ALOX5AP FCRL3
25FOXP1 ARPC1B GAPDH
26MT-ATP8 EZR EMP3
27RPS20 EMP3 GPR183
28LEF1 AHNAK VIM
29RPLP2 LSP1 GATA3
30RPL22 VIM RPL36A
In [110]:
c0.c2 <- wilcoxauc(merged, groups_use = c(0,2), group_by = "seurat_clusters")
options(repr.matrix.max.cols=150, repr.matrix.max.rows=200)

c0.c2 %>% 
    group_by(group) %>% 
    filter(padj < 0.05 & auc > 0.6) %>% 
    arrange(group, desc(logFC)) %>% 
    #slice_max(n = 40, order_by = logFC)%>% 
    dplyr::select(feature, group) %>% 
    mutate(row = row_number()) %>% 
    tidyr::pivot_wider(names_from = group, values_from = feature) -> c0.c2.show
c0.c2.show[1:30,]
A tibble: 30 × 3
row02
<int><chr><chr>
1CCR7 S100A4
2TCF7 CRIP1
3PRKCQ-AS1IL32
4GAS5 SH3BGRL3
5EEF1B2 S100A11
6FOXP1 S100A10
7TGFBR2 ITGB1
8TXK CD74
9RPS12 ANXA2
10RPS13 CLIC1
11SNHG29 EMP3
12PABPC1 SRGN
13RPS5 KLF6
14RPL31 VIM
15NPM1 AHNAK
16RPL32 ARPC1B
17RPS14 PFN1
18MALAT1 HLA-A
19RPL35A GAPDH
20RPL38 S100A6
21RPL5 COTL1
22RPL7 TAGLN2
23RPL30 EZR
24RPS23 MYL6
25RPS6 GSTK1
26RPL22 CD99
27RPLP2 YWHAB
28RPS3A LSP1
29RPS9 CFL1
30RPL37 ACTB
In [111]:
c2.c1 <- wilcoxauc(merged, groups_use = c(1,2), group_by = "seurat_clusters")
options(repr.matrix.max.cols=150, repr.matrix.max.rows=200)

c2.c1 %>% 
    group_by(group) %>% 
    filter(padj < 0.05 & auc > 0.6) %>% 
    arrange(group, desc(logFC)) %>% 
    #slice_max(n = 40, order_by = logFC)%>% 
    dplyr::select(feature, group) %>% 
    mutate(row = row_number()) %>% 
    tidyr::pivot_wider(names_from = group, values_from = feature) -> c2.c1.show
c2.c1.show[1:20,]
A tibble: 20 × 3
row12
<int><chr><chr>
1CD74 EEF1B2
2HLA-DRB1IFITM1
3HLA-DRB5RPS12
4HLA-DPA1RPL36A
5HLA-DPB1RPL32
6LGALS1 RPS5
7HLA-DRA SNHG29
8IL32 RPS21
9CYTOR RPS8
10ANXA2 GIMAP7
11S100A4 TCF7
12LGALS3 RPL39
13GBP5 RPS6
14CTSC RPL10A
15CLIC1 GAS5
16ALOX5AP RPS29
17MT2A RPL5
18SRGN RPL3
19S100A6 RPS13
20FOXP3 RPLP0
In [112]:
c4.c5 <- wilcoxauc(merged, groups_use = c(4,5), group_by = "seurat_clusters")
options(repr.matrix.max.cols=150, repr.matrix.max.rows=200)

c4.c5 %>% 
    group_by(group) %>% 
    filter(padj < 0.05 & auc > 0.6) %>% 
    arrange(group, desc(logFC)) %>% 
    #slice_max(n = 40, order_by = logFC)%>% 
    dplyr::select(feature, group) %>% 
    mutate(row = row_number()) %>% 
    tidyr::pivot_wider(names_from = group, values_from = feature) -> c4.c5.show
c4.c5.show[1:20,]
A tibble: 20 × 3
row45
<int><chr><chr>
1CD74 FOS
2TNFRSF18 JUNB
3GAPDH JUN
4MT2A FOSB
5PKM DUSP1
6TNFRSF4 NR4A2
7LAG3 BTG2
8IL2RG CXCR4
9UCP2 ZNF331
10PGAM1 CD69
11HLA-DRB5 KLF2
12ENO1 YPEL5
13CTSC DNAJB1
14SPOCK2 RGCC
15MTRNR2L12TNFAIP3
16PFN1 ZFP36L2
17MYL6 KLF6
18LINC01943DNAJA1
19TNFRSF1B JUND
20ARPC1B RPS10
In [113]:
table(merged$seurat_clusters)
   0    1    2    3    4    5    6    7    8    9 
4829 3479 3074 2972 1745 1305  877  247  236  145 

Naming clusters¶

In [114]:
#naming clusters
cell.states <- merged$seurat_clusters
cell.states <- cell.states %>% if_else(.=="0", "Naive Treg - TCF7/CCR7", .) %>% 
                if_else(.=="2", "CD25-intermediate Treg", .) %>% 
                if_else(.=="1", "CD25-high Treg", .) %>% 
                if_else(.=="3", "AREG Treg", .) %>% 
                if_else(.=="4", "CD25-high CXCR6+ Treg", .) %>% 
                if_else(.=="5", "TNFa response Treg", .) %>%
                if_else(.=="6", "CD161+ memory Treg", .) %>% 
                if_else(.=="7", "ISG high Treg", .) %>%
                if_else(.=="8", "Proliferating", .) %>%
                if_else(.=="9", "GZM+ Treg", .) 

names(cell.states) <- rownames(merged@meta.data)
merged <- AddMetaData(merged, cell.states, "cell.states")
In [115]:
fig.size(10,12)
Idents(merged) <- "cell.states"
DimPlot(merged, group.by = "cell.states", label = T, label.box = T,
        shuffle = T, label.size = 5, pt.size = 0.5) + 
    theme(text = element_text(size = 20)) + scale_fill_tableau("Tableau 20") +
    scale_color_tableau("Tableau 20")
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
In [116]:
table(merged$cell.states, merged$tissue)
                        
                         PBMC.Ctrl PBMC.RA SFMC  Syn
  AREG Treg                    109      83   84 2696
  CD161+ memory Treg           492     172  112  101
  CD25-high CXCR6+ Treg         93      82  615  955
  CD25-high Treg              1500     958  416  605
  CD25-intermediate Treg      1746     903  118  307
  GZM+ Treg                     54      58   15   18
  ISG high Treg                 49      41   90   67
  Naive Treg - TCF7/CCR7      2911    1464   50  404
  Proliferating                 35      15   46  140
  TNFa response Treg            37      36  117 1115
In [117]:
fig.size(8,8)
sum.clust <- merged@meta.data %>% tibble::rownames_to_column(var = "cell.ID") %>% 
    dplyr::select(cell.ID, cell.states, tissue, orig.ident)
pt <- table(sum.clust$cell.states, sum.clust$tissue)
pt
pt %>%  data.frame() %>% setNames(c("cluster", "tissue", "Freq")) %>% 
    ggplot(aes(x = tissue, y = Freq, fill = cluster)) + scale_fill_tableau("Tableau 20") + theme_bw(base_size = 20) + theme(axis.text.x = element_text(angle = 45, hjust = 1))-> pl
pl + geom_col(position = "stack")
pl + geom_col(position = "fill")

pt %>%  data.frame() %>% setNames(c("cluster", "tissue", "Freq")) %>% 
    ggplot(aes(x = cluster, y = Freq, fill = tissue)) + scale_fill_tableau("Tableau 20") + theme_bw(base_size = 20) + theme(axis.text.x = element_text(angle = 45, hjust = 1))-> pl
pl + geom_col(position = "stack")
pl + geom_col(position = "fill")
                        
                         PBMC.Ctrl PBMC.RA SFMC  Syn
  AREG Treg                    109      83   84 2696
  CD161+ memory Treg           492     172  112  101
  CD25-high CXCR6+ Treg         93      82  615  955
  CD25-high Treg              1500     958  416  605
  CD25-intermediate Treg      1746     903  118  307
  GZM+ Treg                     54      58   15   18
  ISG high Treg                 49      41   90   67
  Naive Treg - TCF7/CCR7      2911    1464   50  404
  Proliferating                 35      15   46  140
  TNFa response Treg            37      36  117 1115

Saving final object¶

In [118]:
saveRDS(merged, "/data/brennerlab/Shani/projects/Treg/analysis/integrated/integrated.Tregs.rds")
# merged <- readRDS("/data/brennerlab/Shani/projects/Treg/analysis/integrated/integrated.Tregs.rds")